package com.kwum.ttkuaibao;

import org.apache.commons.lang.StringUtils;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;

import com.kwum.TimeUtils;

import net.sf.json.JSONArray;
import net.sf.json.JSONObject;

/**
 * @Description: 
 * @Company: yimo
 * @author: Kwum
 * @date 2018年2月27日 上午9:31:13
 */

public class Spirder {
	
	public static void main(String[] args) throws Exception {
		spirdeTianTianKuaiBaoArticleListByThreme();
	}

	public static void spirdeTianTianKuaiBaoArticleListByThreme() throws Exception {
		JSONArray array = new JSONArray();
    	JSONObject temp = new JSONObject();
    	//TODO
    	temp.put("name", "猎奇");
    	temp.put("value", "kb_news_curious");
    	array.add(temp);
    	temp = new JSONObject();
    	temp.put("name", "电视剧");
    	temp.put("value", "kb_news_julebu");
    	array.add(temp);
    	temp = new JSONObject();
    	temp.put("name", "娱乐");
    	temp.put("value", "kb_news_bagua");
    	array.add(temp);
    	temp = new JSONObject();
    	temp.put("name", "情感");
    	temp.put("value", "kb_news_sex");
    	array.add(temp);
    	temp = new JSONObject();
    	temp.put("name", "美女");
    	temp.put("value", "kb_news_beauty");
    	array.add(temp);
    	temp = new JSONObject();
    	temp.put("name", "汽车");
    	temp.put("value", "kb_news_car");
    	array.add(temp);
    	temp = new JSONObject();
    	temp.put("name", "军事");
    	temp.put("value", "kb_news_mil");
    	array.add(temp);
    	temp = new JSONObject();
    	temp.put("name", "财经");
    	temp.put("value", "kb_news_finance");
    	array.add(temp);
    	temp = new JSONObject();
    	temp.put("name", "体育");
    	temp.put("value", "kb_news_sports");
    	array.add(temp);
    	temp = new JSONObject();
    	temp.put("name", "动漫");
    	temp.put("value", "kb_news_erciyuan");
    	array.add(temp);
    	temp = new JSONObject();
    	temp.put("name", "科技");
    	temp.put("value", "kb_news_tech");
    	array.add(temp);
    	temp = new JSONObject();
    	temp.put("name", "房产");
    	temp.put("value", "kb_news_house");
    	array.add(temp);
    	temp = new JSONObject();
    	temp.put("name", "国际");
    	temp.put("value", "kb_news_world");
    	array.add(temp);
    	temp = new JSONObject();
    	temp.put("name", "历史");
    	temp.put("value", "kb_news_history");
    	array.add(temp);
    	temp = new JSONObject();
    	temp.put("name", "搞笑");
    	temp.put("value", "kb_news_laugh");
    	array.add(temp);
    	temp = new JSONObject();
    	temp.put("name", "健康");
    	temp.put("value", "kb_news_jiankang");
    	array.add(temp);
    	temp = new JSONObject();
    	temp.put("name", "电竞");
    	temp.put("value", "kb_news_esport");
    	array.add(temp);
    	temp = new JSONObject();
    	temp.put("name", "职场");
    	temp.put("value", "kb_news_career");
    	array.add(temp);
    	temp = new JSONObject();
    	temp.put("name", "育儿");
    	temp.put("value", "kb_news_baby");
    	array.add(temp);
    	temp = new JSONObject();
    	temp.put("name", "减肥");
    	temp.put("value", "kb_news_diet");
    	array.add(temp);
    	temp = new JSONObject();
    	temp.put("name", "创意");
    	temp.put("value", "kb_news_idea");
    	array.add(temp);
    	temp = new JSONObject();
    	temp.put("name", "旅游");
    	temp.put("value", "kb_news_travel");
    	array.add(temp);
    	temp = new JSONObject();
    	temp.put("name", "家居");
    	temp.put("value", "kb_news_furnishing");
    	array.add(temp);
    	temp = new JSONObject();
    	temp.put("name", "美食");
    	temp.put("value", "kb_news_cate");
    	array.add(temp);
    	temp = new JSONObject();
    	temp.put("name", "教育");
    	temp.put("value", "kb_news_edu");
    	array.add(temp);
    	temp = new JSONObject();
    	temp.put("name", "三农");
    	temp.put("value", "kb_news_farmer");
    	array.add(temp);
    	temp = new JSONObject();
    	temp.put("name", "涨知识");
    	temp.put("value", "kb_news_science");
    	array.add(temp);
    	temp = new JSONObject();
    	temp.put("name", "星座");
    	temp.put("value", "kb_news_astro");
    	array.add(temp);
    	temp = new JSONObject();
    	temp.put("name", "游戏");
    	temp.put("value", "kb_news_game");
    	array.add(temp);
    	temp = new JSONObject();
    	temp.put("name", "时尚");
    	temp.put("value", "kb_news_chaobao");
    	array.add(temp);
    	temp = new JSONObject();
    	temp.put("name", "养生");
    	temp.put("value", "kb_news_health");
    	array.add(temp);
    	temp.put("name", "热点");
    	temp.put("value", "kb_news_hotnews");
    	array.add(temp);
    	temp = new JSONObject();
    	temp.put("name", "萌宠");
    	temp.put("value", "kb_news_pet");
    	array.add(temp);
    	temp = new JSONObject();
    	temp.put("name", "电影");
    	temp.put("value", "kb_news_movie");
    	array.add(temp);
    	temp = new JSONObject();
    	temp.put("name", "健身");
    	temp.put("value", "kb_news_workout");
    	array.add(temp);
    	temp = new JSONObject();
    	//TODO
    	for(int j = 0; j < array.size(); j++) {
    		String chlid = array.getJSONObject(j).getString("value");
    		String category = "";
    		int manualRefresh = 1;
    		int chRefreshTimes = 0;
    		long lastRefreshTime = 0;
    		long currentTime = TimeUtils.string2Timestamp(TimeUtils.getNow()); 
    		int page = 0;
    		int forward = 0;
    		String top_id = "";
    		long top_time = 0;
    		String bottom_id = "";
    		long bottom_time = 0;
    		int spirderCount = 0;
    		
    		switch (chlid) {
    		case "kb_news_hotnews":
    			category = "热点";
    			break;
    		case "kb_news_pet":
    			category = "萌宠";
    			break;
    		case "kb_news_movie":
    			category = "电影";
    			break;
    		case "kb_news_workout":
    			category = "健身";
    			break;
    		case "kb_news_curious":
    			category = "猎奇";
    			break;
    		case "kb_news_julebu":
    			category = "电视剧";
    			break;
    		case "kb_news_bagua":
    			category = "娱乐";
    			break;
    		case "kb_news_sex":
    			category = "情感";
    			break;
    		case "kb_news_beauty":
    			category = "美女";
    			break;
    		case "kb_news_car":
    			category = "汽车";
    			break;
    		case "kb_news_mil":
    			category = "军事";
    			break;
    		case "kb_news_finance":
    			category = "财经";
    			break;
    		case "kb_news_sports":
    			category = "体育";
    			break;
    		case "kb_news_erciyuan":
    			category = "动漫";
    			break;
    		case "kb_news_tech":
    			category = "科技";
    			break;
    		case "kb_news_house":
    			category = "房产";
    			break;
    		case "kb_news_world":
    			category = "国际";
    			break;
    		case "kb_news_history":
    			category = "历史";
    			break;
    		case "kb_news_laugh":
    			category = "搞笑";
    			break;
    		case "kb_news_jiankang":
    			category = "健康";
    			break;
    		case "kb_news_esport":
    			category = "电竞";
    			break;
    		case "kb_news_career":
    			category = "职场";
    			break;
    		case "kb_news_baby":
    			category = "育儿";
    			break;
    		case "kb_news_diet":
    			category = "减肥";
    			break;
    		case "kb_news_idea":
    			category = "创意";
    			break;
    		case "kb_news_travel":
    			category = "旅游";
    			break;
    		case "kb_news_furnishing":
    			category = "家居";
    			break;
    		case "kb_news_cate":
    			category = "美食";
    			break;
    		case "kb_news_edu":
    			category = "教育";
    			break;
    		case "kb_news_farmer":
    			category = "三农";
    			break;
    		case "kb_news_science":
    			category = "涨知识";
    			break;
    		case "kb_news_astro":
    			category = "星座";
    			break;
    		case "kb_news_game":
    			category = "游戏";
    			break;
    		case "kb_news_chaobao":
    			category = "时尚";
    			break;
    		case "kb_news_health":
    			category = "养生";
    			break;
    		default:
    			break;
    		}
    		
    		//第一次访问
    		Document document = Jsoup.connect("https://r.cnews.qq.com/getSubNewsChlidInterest?devid=863064010002246" +
    								"&provinceId=25&loc_catalog=&lon=103.56548&cityList=&loc_addr=&cityId=307&userCity=&loc_street=Unknown&adcode=530326&loc_name=" + 
    								"&loc_accuracy=2000.0&lat=26.997655&loc_streetNo=Unknown&omgbizid=&Cookie=%26lskey%3D00030000097856e6647ac7faded0d0de4c184358f2497b29ab62cff2a2561a073984d3e22bd1af5d8b329b06%26luin%3Do0787207525%26skey%3DM0WMGUOPXo%26uin%3Do0787207525%26logintype%3D0&direction=1&qn-sig=f195e53b957230a058709b1cb78185fd&activefrom=icon&luin=o0787207525&uin=o0787207525" + 
    								"&last_time=" + 
    								"&top_time=" + 
    								"&chlid=kb_news_laugh" + 
    								"&manualRefresh=0" + 
    								"&REQExecTime=" + (currentTime * 1000) +
    								"&bottom_id=" + 
    								"&qqnetwork=wifi" + 
    								"&chRefreshTimes=0" + 
    								"&unixtimesign=" + (currentTime * 1000) +
    								"&imsi_history=460070022413868" + 
    								"&page=0" + 
    								"&forward=0" + 
    								"&commonGray=1_3%7C2_1%7C11_0%7C12_1%7C10_1&ssid=HWMLAa" + 
    								"&top_id=" + 
    								"&lastRefreshTime=" +
    								"&refresh_from=refresh_init" +
    								"&REQBuildTime=" + (currentTime * 1000) +
    								"&qn-rid=eab19fbe-503c-4478-89b2-7783b0d03766&currentTab=kuaibao&qimei=863064010002246&sessionid=&muid=140513080316233034&refreshType=normal&bssid=00%3AE0%3A8A%3A68%3A07%3AB2" + 
    								"&last_id=" + 
    								"&bottom_time=" + 
    								"&is_wap=0&imsi=460070022413868&omgid=&cachedCount=0&uid=00e08a6807b24642&store=74213&hw=HUAWEI%20_HUAWEIMLA-AL10&devid=863064010002246&appversion=4.6.60&screen_width=1080&hw_fp=HUAWEI%2FMLA-AL10%2FHWMLA%3A4.4.2%2FHUAWEIMLA-AL10%2F381180209%3Auser%2Frelease-keys&mac=00%3AE0%3A8A%3A68%3A07%3AB2&appver=19_areading_4.6.60&android_id=00e08a6807b24642&origin_imei=863064010002246&sceneid=&mid=4df52519480657c9bd6ecd4df58021a6700fab24&apptype=android&screen_height=1920")
    								.ignoreContentType(true)
    								.post();
    		JSONObject htmlJson = JSONObject.fromObject(document.text());
    		
    		System.out.println(htmlJson.toString());
    		
    		//获取新闻内容
    		JSONArray newList = htmlJson.getJSONArray("newslist");
    		JSONArray commentCountList = htmlJson.getJSONObject("changeInfo").getJSONArray("subIdComments");
    		
    		//赋值，用于翻页
    		top_id = newList.optJSONObject(0).optString("id");
    		top_time = newList.optJSONObject(0).optLong("timestamp");
    		lastRefreshTime = currentTime;
    		forward = 1;
    		chRefreshTimes++;
    		
    		for(int i = 0; i < newList.size(); i++) {
    			JSONObject json = newList.getJSONObject(i);
    			
    			//只拿文章
    			if(StringUtils.equals("0", json.optString("articletype", ""))) {
    				TianTianKuaiBao kuaiBao = new TianTianKuaiBao(json.getString("id"), json.getString("title"), json.getString("url"), 
    						json.getString("source"), json.getString("time"), commentCountList.getJSONObject(i).getInt("comments"), category);
    				
    				//插入数据库
    				MySQLUtils.insertKuaiBaoNewList(kuaiBao);
    			}
    			
    			//赋值，用于翻页
    			bottom_id = json.getString("id");
    			bottom_time = json.getLong("timestamp");
    		}
    		
    		//翻页
    		while(spirderCount < 50) {
    			spirderCount++;
    			//每次刷新或者翻页等待3秒钟
    			Thread.sleep( 3 * 1000 );
    			
    			document = Jsoup.connect("https://r.cnews.qq.com/getSubNewsChlidInterest?devid=863064010002246" +
    							"&provinceId=25&loc_catalog=&lon=103.56548&cityList=&loc_addr=&cityId=307&userCity=&loc_street=Unknown&adcode=530326&loc_name=" + 
    							"&loc_accuracy=2000.0&lat=26.997655&loc_streetNo=Unknown&omgbizid=&Cookie=%26lskey%3D00030000097856e6647ac7faded0d0de4c184358f2497b29ab62cff2a2561a073984d3e22bd1af5d8b329b06%26luin%3Do0787207525%26skey%3DM0WMGUOPXo%26uin%3Do0787207525%26logintype%3D0&direction=1&qn-sig=f195e53b957230a058709b1cb78185fd&activefrom=icon&luin=o0787207525&uin=o0787207525" + 
    							"&last_time=" + lastRefreshTime +
    							"&top_time=" + top_time +
    							"&chlid=kb_news_laugh" + 
    							"&manualRefresh=" + manualRefresh +
    							"&REQExecTime=" + (currentTime * 1000) +
    							"&bottom_id=" + bottom_id +
    							"&qqnetwork=wifi" + 
    							"&chRefreshTimes=" + chRefreshTimes + 
    							"&unixtimesign=" + (currentTime * 1000) +
    							"&imsi_history=460070022413868" + 
    							"&page=" + page +
    							"&forward=" + forward +
    							"&commonGray=1_3%7C2_1%7C11_0%7C12_1%7C10_1&ssid=HWMLAa" + 
    							"&top_id=" + top_id +
    							"&lastRefreshTime=" + lastRefreshTime +
    							"&refresh_from=refresh_init" +
    							"&REQBuildTime=" + (currentTime * 1000) +
    							"&qn-rid=eab19fbe-503c-4478-89b2-7783b0d03766&currentTab=kuaibao&qimei=863064010002246&sessionid=&muid=140513080316233034&refreshType=normal&bssid=00%3AE0%3A8A%3A68%3A07%3AB2" + 
    							"&last_id=" + bottom_id +
    							"&bottom_time=" + bottom_time +
    							"&is_wap=0&imsi=460070022413868&omgid=&cachedCount=0&uid=00e08a6807b24642&store=74213&hw=HUAWEI%20_HUAWEIMLA-AL10&devid=863064010002246&appversion=4.6.60&screen_width=1080&hw_fp=HUAWEI%2FMLA-AL10%2FHWMLA%3A4.4.2%2FHUAWEIMLA-AL10%2F381180209%3Auser%2Frelease-keys&mac=00%3AE0%3A8A%3A68%3A07%3AB2&appver=19_areading_4.6.60&android_id=00e08a6807b24642&origin_imei=863064010002246&sceneid=&mid=4df52519480657c9bd6ecd4df58021a6700fab24&apptype=android&screen_height=1920")
    							.ignoreContentType(true)
    							.post();
    			htmlJson = JSONObject.fromObject(document.text());
    			
    			System.out.println(category + spirderCount + "..." + "翻页..." + htmlJson.toString());
    			
    			//获取新闻内容
    			newList = htmlJson.getJSONArray("newslist");
    			commentCountList = htmlJson.getJSONObject("changeInfo").getJSONArray("subIdComments");
    			
    			//赋值，用于翻页
    			top_id = newList.optJSONObject(0).optString("id");
    			top_time = newList.optJSONObject(0).optLong("timestamp");
    			lastRefreshTime = currentTime;
    			forward = 1;
    			
    			for(int i = 0; i < newList.size(); i++) {
    				JSONObject json = newList.getJSONObject(i);
    				
    				//只拿文章
    				if(StringUtils.equals("0", json.optString("articletype", ""))) {
    					TianTianKuaiBao kuaiBao = new TianTianKuaiBao(json.getString("id"), json.getString("title"), json.getString("url"), 
    							json.getString("source"), json.getString("time"), commentCountList.getJSONObject(i).getInt("comments"), category);
    					
    					//插入数据库
    					MySQLUtils.insertKuaiBaoNewList(kuaiBao);
    				}
    				
    				//赋值，用于翻页
    				bottom_id = json.getString("id");
    				bottom_time = json.getLong("timestamp");
    			}
    		}
    	}
	}
}
